from datetime import datetime, timedelta
import math
import os
import time
import json
from plotly.offline import init_notebook_mode, iplot
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter, ColumnDataSource
from bokeh.plotting import figure, output_file, show
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
# bokeh: configure for notebook
# https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
output_notebook()
# load data: check if we have a fresh local version (8 hours ago or newer)
# if we don't have a fresh version, pull down a remote csv
def download_data_source(local_path):
"""Download latest data and save to param:local_path"""
import requests
data_source = (
"https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
)
resp = requests.get(data_source, stream=True)
resp.raise_for_status()
with open(local_path, "wb") as openfile:
for block in resp.iter_content(1024):
openfile.write(block)
local_path = os.path.join(".", "us-counties.csv")
if os.path.isfile(local_path):
mtime = int(os.stat(local_path).st_mtime)
now = int(time.time())
age = (now - mtime) / 60 / 60 # to horus
if age >= 8:
print("Local Data: is stale - downloading")
download_data_source(local_path)
else:
print("Local Data: not found - downloading")
download_data_source(local_path)
print("Local Data: loading from file")
df = pd.read_csv(local_path, dtype={"fips": str})
df = df[df.state == "Texas"] # filter to only texas data
df.head()
# download geojson
# https://github.com/TNRIS/tx.geojson/blob/master/counties/tx_counties.geojson
def download_geojson(local_path):
import requests
data_source = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
resp = requests.get(data_source, stream=True)
resp.raise_for_status()
with open(local_path, "wb") as openfile:
for block in resp.iter_content(1024):
openfile.write(block)
geojson_local_path = os.path.join(".", "geojson-counties-fips.json")
if not os.path.isfile(geojson_local_path):
print("Geojson: downloading")
download_geojson(geojson_local_path)
with open(geojson_local_path, "r") as openfile:
geojson = json.loads(openfile.read())
print("Geojson: loaded data")
print(json.dumps(geojson, indent=2, sort_keys=True)[:100])
# https://hhs.texas.gov/sites/default/files/documents/about-hhs/hhs-regional-map.pdf
from texas_doh_regions import (
region1,
region2,
region3,
region4,
region5,
region6,
region7,
region8,
region9,
region10,
)
region_county_dict = dict(
region_1=region1,
region_2=region2,
region_3=region3,
region_4=region4,
region_5=region5,
region_6=region6,
region_7=region7,
region_8=region8,
region_9=region9,
region1_0=region10,
)
def get_region(county):
for region, counties in region_county_dict.items():
if county in counties:
return region.replace("_", " ").title()
df["region"] = df.county.apply(get_region)
df.head()
# import county population data
with open("tx-county-population--modified.json", "r") as openfile:
tx_county_data = json.loads(openfile.read())
df["population"] = df.county.apply(lambda county: tx_county_data[county]["Pop"])
df["cases_pop"] = df.apply(lambda row: row.cases / row.population, axis=1)
df["deaths_pop"] = df.apply(lambda row: row.deaths / row.population, axis=1)
df["death_rate"] = df.apply(lambda row: row.deaths / row.cases, axis=1)
df.head()
# Cloropleth Maps
#
# Constants
tx_center = {"lat": 31.169621, "lon": -99.683617}
default_cloropleth_kwargs = dict(
geojson=geojson,
locations="fips",
color_continuous_scale="Plasma",
mapbox_style="carto-positron",
zoom=4.2,
center=tx_center,
opacity=0.5,
)
_min = min(df.cases.unique())
_max = max(df.cases.unique())
fig = px.choropleth_mapbox(
df,
color="cases",
range_color=(_min, _max),
labels={"cases": "cases"},
hover_data=["county", "cases", "population", "cases_pop"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
_min = min(df.cases_pop.unique())
_max = max(df.cases_pop.unique())
fig = px.choropleth_mapbox(
df,
color="cases_pop",
range_color=(_min, _max),
labels={"cases_pop": "% Infected"},
hover_data=["county", "cases", "population", "cases_pop"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
_min = min(df.deaths_pop.unique())
_max = max(df.deaths_pop.unique())
fig = px.choropleth_mapbox(
df,
color="deaths_pop",
range_color=(_min, _max),
labels={"deaths_pop": "Death Count"},
hover_data=["county", "deaths", "population", "deaths_pop"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
df_death_rate_unique = df.death_rate.unique()
_min, _max = min(df_death_rate_unique), max(df_death_rate_unique)
fig = px.choropleth_mapbox(
df,
color="death_rate",
range_color=(_min, _max),
labels={"death_rate": "Mortality Rate"},
hover_data=["county", "death_rate", "deaths", "cases", "population"],
**default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
def function(df):
df = df[["cases", "county", "date",]].groupby(by="date").agg(["sum"])
df.columns = {("cases", "sum"): "cases_sum", ("deaths", "sum"): "deaths_sum"}
df.head()
p = figure(
title="Texas: Cases by Health Region",
x_axis_label="Date",
y_axis_label="# of Cases & Deaths",
y_range=[0, int(df[("cases", "sum")].max() * 1.20)],
x_range=[df.index[0], pd.Timestamp(df.index.max()) + timedelta(days=2)],
plot_width=880,
tools="pan,wheel_zoom,box_zoom,reset",
)
# add actual values as an area graph
source = ColumnDataSource(
data=dict(x=df.index, cases=df[("cases", "sum")], deaths=df[("deaths", "sum")],)
)
p.varea_stack(["deaths", "cases"], x="x", color=("red", "lightblue"), source=source)
# add unbiased polyfit
steps = 10
extended_x = list(df.index) + [
pd.Timestamp(df.index.max()) + timedelta(days=i) for i in range(1, steps + 1)
]
label_dict = {str(d): d for d in df.index}
p.xaxis.formatter = FuncTickFormatter(
code="""
let date = new Date(tick);
return `${date.getDate()}-${date.getMonth()}-${date.getFullYear()}`
"""
)
return p
show(function(df))